TREES

BBC’s Top 100 Influential Women of 2020

Sunburst Chart

Photo of Nemonte Nenquimo

Photo of Nemonte Nenquimo

And now, as a woman, as a mother, as a water protector and a forest defender,
I want you to join us in our fight to defend our way of life, our forests and our planet…
— Nemonte Nenquimo


Sunburst Charts are visually-appealing and interactive charts ideal for hierarchical data. The hierarchy is represented by one ring, with the innermost ring representing the root node and the hierarchy moving outwards from it. The rings are then segmented to a categorical data point based on their hierarchical relationship to the parent slice. The size of each segment can either be divided equally under its parent nodes or can me made proportional to a value.

Ingest

name, category, country, role

df_file_path <- "archetypes/top-100-influential-women/top-100-influential-women.csv"
df = read.csv(df_file_path, header = TRUE, stringsAsFactors = FALSE, encoding = "UTF-8")
df

Wrangle

enrich with continent to create hierarchy

df_wrangle <- df

# select only the needed columns
df_wrangle <- df_wrangle %>% select(name, category, country, role)

# enrich with UN region name
df_wrangle$region <- countrycode(df_wrangle$country, origin='country.name', destination='un.region.name')

# for non-matches, replace with a generic "World" entry
# df_wrangle <- df_wrangle %>% replace_na(list(region = "World"))
# or, replace with country entries
df_wrangle$region <- ifelse(is.na(df_wrangle$region), df_wrangle$country, df_wrangle$region)

# we'll add a constant size variable for the aggregation
df_wrangle <- df_wrangle %>% mutate(ID = row_number(), SIZE = 1)
# we need to create unique ids to avoid duplicate vertices
df_wrangle <- df_wrangle %>% mutate(country = paste0(country, "_", ID))
df_wrangle

Wrangle, part II

create graph hierarchy

df_filtered <- filter(df_wrangle, category != 'Top 100')

# unique edges
df_edges_0 <- aggregate(x = df_filtered$SIZE,
          by = list(df_filtered$category),
          FUN = sum)

# standard edge table structure
colnames(df_edges_0) <- c("TO", "SIZE")
df_edges_0$FROM <- "Top 100"

df_edges_1 <- aggregate(x = df_filtered$SIZE,
          by = list(df_filtered$category, df_filtered$country),
          FUN = sum)

# standard edge table structure
colnames(df_edges_1) <- c("FROM","TO", "SIZE")

df_edges_2 <- aggregate(x = df_filtered$SIZE,
          by = list(df_filtered$country, df_filtered$name),
          FUN = sum)

# standard edge table structure
colnames(df_edges_2) <- c("FROM","TO", "SIZE")

# combine
df_edges <- rbind(df_edges_0, df_edges_1, df_edges_2)
df_edges <- df_edges[c("FROM", "TO", "SIZE")]
df_edges
# consolidate to have nodes list; used for validation if needed
df_node_list <- as.data.frame(c(df_edges$FROM, df_edges$TO))
colnames(df_node_list) <- c("NODE")
#df_node_list

# root nodes
df_nodes_1 <- aggregate(x = df_wrangle$SIZE,
          by = list(df_wrangle$category),
          FUN = sum)

colnames(df_nodes_1) <- c("NODE","SIZE")
df_nodes_1$COLOR <- df_nodes_1$NODE

# leaf nodes
df_nodes_2 <- aggregate(x = df_filtered$SIZE,
          by = list(df_filtered$country, df_filtered$category),
          FUN = sum)

colnames(df_nodes_2) <- c("NODE", "COLOR", "SIZE")

# leaf nodes
# df_nodes_3 <- df_filtered %>% select(name, category, SIZE)
df_nodes_3 <- aggregate(x = df_filtered$SIZE,
          by = list(df_filtered$name, df_filtered$category),
          FUN = sum)
colnames(df_nodes_3) <- c("NODE", "COLOR", "SIZE")

# combine
df_nodes <- rbind(df_nodes_1, df_nodes_2, df_nodes_3)
df_nodes
# a check, when needed, to find missing node entries
test_1 <- anti_join(df_nodes, df_node_list, by = "NODE" )
#test_1
test_2 <- anti_join(df_node_list, df_nodes, by = "NODE" )
#test_2


# transform to graph data structure
df_graph <- graph_from_data_frame( df_edges, vertices = df_nodes )

Plot

graph version

theme_opts <- theme(
    text = element_text(family = "inconsolata"), 
    plot.margin = unit(c(1.5,1,1,1), "in"),
    legend.position='none'
  )

category_palette <- c("Top 100" = "#FFFFFF", "LEAF" = "#FFFFFF", "Creativity" = "#E8F5E9", "Identity" = "#E3F2FD", "Knowledge" = "#F3E5F5","Leadership" = "#FCE4EC")

v1 <- ggraph(df_graph, layout = 'partition', circular = TRUE, weight = SIZE) + 
  geom_node_arc_bar(aes(fill = ifelse(leaf, 'LEAF', COLOR)), color='#ffffff') + 
  scale_fill_manual(values = category_palette) +
  geom_node_label( aes(label=name, filter=depth<2), size = 6, label.size = NA, family = "inconsolata") +
  geom_node_text( aes(filter = leaf, angle = node_angle(x, y), label = name), hjust = 0, size = 3, family = "inconsolata" ) +
  coord_fixed(clip = 'off') + 
  theme_void() +
  theme_opts

girafe(ggobj = v1, width_svg = 1280/72, height_svg = 720/72,
       options = list(opts_sizing(rescale = TRUE, width = 1.0))
)
# pg <- ggplot_build(v1)
# pg$data[[2]]
# pg$data[[3]]

interactive version

total <- df %>%
    select(category, country, role, name) %>%
    # remove dash within dplyr pipe
    mutate_at(vars(3, 4), funs(gsub("-", "", .))) %>%
    mutate(
        path = paste(category, country, role, name, sep = "-")
    ) %>%
    slice(2:100) %>%
    mutate(
        V2 = 2
)

sund2b(data = data.frame(xtabs(V2~path, total)), rootLabel = 'continent', showLabels = TRUE, 
         colors = list(range = RColorBrewer::brewer.pal(9, "Set3")))

References

citations for narrative and data sources